knitr::opts_chunk$set(echo=TRUE, include=TRUE, message=TRUE,
warning=FALSE, paged.print=FALSE, fig.width = 7)
##Important!! Enter credential information. Include last "/" of the baseurl
##ENTER BASE URL, USERNAME, PASSWORD
baseurl<-params$baseurl
username<-params$username
##Load required packages
required_packages<-c("tidyverse","httr","assertthat","readr","knitr",
"tidyr","jsonlite", "keyring", "lubridate","survival","survminer",
"GGally","ggfortify")
is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
load_or_install<-function(required_packages) {
for(package_name in required_packages) {
if(!is_installed(package_name)) {
install.packages(package_name) }
library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE)
}
}
load_or_install(required_packages)
The WHO HIV Case Based surveillance metadata package can be used to capture patient-level data on ART regimens over time, including date of ART initiation and patient status. However, DHIS2 analytics are not advanced enough to build Cox Proportional Hazards models, or survival curves like below. These can be done in R with the survminer and survival packages.
ggsurvplot(
fit = survfit(Surv(time, status) ~ 1, data = lung),
xlab = "Days",
ylab = "Overall survival probability",
title="Generic survival curve")
This demo will show how to download the DHIS2 Event Reports required to build survival curves in an RMarkdown report.
Data have been collected from a demo DHIS2 database that uses the WHO metadata package for HIV case surveillance.
This event report shows the latest Visit stage for all enrollments from the past 5 years and present year.
The variables included are…
Two ways to import the data: download and then read CSV, or import directly through API.
To download the event report, go to Download > CSV
Data from the export could be then read into R for analysis with the readr::read_csv() function.
Trying through API second. On the first time running the script, you will be asked for a password to log-in to the demo system.
Once the keyring package stores and encrypts the password as a local environment variable, you can comment out that password set function Next time you run the script on this machine, you will not need to enter the password.
Using the API in this way allows for more process automation: run the script once, and then execute it routinely with a Cron job to access and publish the latest data.
#youll now be prompted for password
# if you hav already set the password with keyring, you can comment this out
# keyring::key_set("Password", username=username)
##test login
loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,
keyring::key_get("Password", username=username)))
warn_for_status(r, task="log in")
if(r$status_code == 200L){return(TRUE)}
}
if(loginDHIS2(baseurl, username, password)==TRUE){
print("successfully logged in")
}else{
stop("could not log in! Please check url, username and password")
}
## [1] "successfully logged in"
The API request url from the event report is found when going to Download > HTML . Simply change the link’s extension from .html+css to .csv
More details about the DHIS2 API can be found in the DHIS2 Developer Guide
Read CSV will show the columns imported and inferred column types
url<-paste0(baseurl,
"api/29/analytics/enrollments/query/Xh88p1nyefp.csv?", #enrollment query in CSV
"dimension=pe:THIS_YEAR;LAST_5_YEARS", #this year and last 5
"&dimension=ou:USER_ORGUNIT",#user orgunit
"&dimension=Jt68iauILtD", #Gender attribute
"&dimension=ang4CLldbIu.JyeF0Utlhfp:GT:1&", #days since ART > 1 (follow up visit)
"&dimension=ang4CLldbIu.rcUsYgOnlyF", # latest status
"&stage=ang4CLldbIu&displayProperty=NAME", #identify stage and the displayName
"&tableLayout=true&dataIdScheme=NAME", #use name of DE
"&columns=pe;ou;Jt68iauILtD;JyeF0Utlhfp;rcUsYgOnlyF", #defines layout
"&rows=pe;ou;Jt68iauILtD;JyeF0Utlhfp;rcUsYgOnlyF&paging=false") # turn off paging
# function to fetch data from API
dta<-read_csv(httr::content(GET(url)))
##
## -- Column specification --------------------------------------------------------
## cols(
## Enrollment = col_character(),
## `Tracked entity instance` = col_character(),
## `Enrollment date` = col_datetime(format = ""),
## `Incident date` = col_datetime(format = ""),
## Geometry = col_logical(),
## Longitude = col_double(),
## Latitude = col_double(),
## `Organisation unit name` = col_character(),
## `Organisation unit code` = col_character(),
## `Organisation unit` = col_character(),
## `Gender M, F, TG` = col_character(),
## `HIV - Days Since Treatment Start` = col_double(),
## `HIV - Treatment Status` = col_character()
## )
When cleaning the data, observations are considered CENSORED if the latest visit status was On ART or Transferred Out. That means the patient did not die or stop treatment before the end of the observation period.
CS_data<-dta %>%
janitor::clean_names() %>% ## clean variable names
select(contains("enrollment"), contains("gender") ,contains("treatment")) %>% ## select vars
rename("gender"=3, "days_since_start"=4,"status"=5) %>% ## rename variables
mutate(censored=if_else(str_detect(status,c("ART|Transfer")), 1, 2)) %>% # censor iff last status On ART or transfer, otherwise status implies death/ltfu
na.omit() ## remove NA
##summarize data
CS_data %>%
select(status,enrollment_date,days_since_start) %>%
# split(.$status) %>%
map(summary)
## $status
## Length Class Mode
## 9113 character character
##
## $enrollment_date
## Min. 1st Qu. Median
## "2018-12-02 00:00:00" "2019-07-09 00:00:00" "2019-12-02 00:00:00"
## Mean 3rd Qu. Max.
## "2019-12-22 04:04:08" "2020-05-16 00:00:00" "2021-04-20 00:00:00"
##
## $days_since_start
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.0 260.0 494.0 447.4 629.0 1027.0
##count by latest status
CS_data %>%
count(status) %>%
kable() #format results
| status | n |
|---|---|
| Death (documented) | 360 |
| Lost to follow up | 277 |
| On ART | 7730 |
| Refused (stopped) treatment | 230 |
| Transferred out | 516 |
## histogram
CS_data %>%
ggplot()+
geom_histogram(aes(days_since_start))+
facet_wrap(~status) + ## histogram by status
labs(title="Histogram by Last Visit Status")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Basic model without covariates, basic plot.
km_fit<-survfit(Surv(days_since_start, censored) ~ 1,
data=CS_data,
type="kaplan-meier")
km_fit
## Call: survfit(formula = Surv(days_since_start, censored) ~ 1, data = CS_data,
## type = "kaplan-meier")
##
## n events median 0.95LCL 0.95UCL
## 9113 867 1027 NA NA
plot(km_fit)
km_fit1<-survfit(Surv(days_since_start, censored) ~ gender,
data=CS_data,
type="kaplan-meier")
km_fit1
## Call: survfit(formula = Surv(days_since_start, censored) ~ gender,
## data = CS_data, type = "kaplan-meier")
##
## n events median 0.95LCL 0.95UCL
## gender=Female 5317 520 1027 NA NA
## gender=Male 3601 347 1027 NA NA
## gender=Transgender 195 0 NA NA NA
# summary(km_fit1)
the Kaplan-Meier model is plotted with the survminer R package.
Code adapted from: http://www.sthda.com/english/wiki/survminer-0-3-0
# fit <- survfit(Surv(time, status) ~ sex, data = lung)
plotObj<-ggsurvplot(km_fit1, data = CS_data,
title = "HIV Survival on ART",
pval = TRUE, pval.method = TRUE, # Add p-value & method name
surv.median.line = "hv", # Add median survival lines
legend.title = "Gender", # Change legend titles
palette = "lancet", # Use JCO journal color palette
risk.table = TRUE, # Add No at risk table
# cumevents = TRUE, # Add cumulative No of events table
tables.height = 0.2, # Specify tables height
tables.theme = theme_cleantable(), # Clean theme for tables
tables.y.text = FALSE, # Hide tables y axis text
xlab = "Days",
conf.int = TRUE
# xlim = c(0,1000),
# ylim = c(0.5,1)
)
# Now save the plot image
ggsave("survivalplot.png", print(plotObj))
## Saving 7 x 5 in image
plotObj
survdiff(Surv(days_since_start, censored) ~ gender,
data=CS_data)
## Call:
## survdiff(formula = Surv(days_since_start, censored) ~ gender,
## data = CS_data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## gender=Female 5317 520 496.8 1.08812 2.7789
## gender=Male 3601 347 348.6 0.00699 0.0128
## gender=Transgender 195 0 21.7 21.68778 22.6853
##
## Chisq= 23.3 on 2 degrees of freedom, p= 9e-06
broom::tidy(
coxph(Surv(days_since_start, censored) ~ gender,
data = CS_data),
exp = TRUE) %>%
kable()
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| genderMale | 0.9500982 | 0.0693246 | -0.7384090 | 0.4602660 |
| genderTransgender | 0.0000001 | 642.6196424 | -0.0249996 | 0.9800552 |
Knitr to turn RMarkdown doc into an HTML webpage, PDF, or slide deckOr… import CSV from DHIS2 into your choice of software e.g. STATA, SPSS
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggfortify_0.4.11 GGally_2.1.1 survminer_0.4.9 ggpubr_0.4.0
## [5] survival_3.2-7 lubridate_1.7.9.2 keyring_1.1.0 jsonlite_1.7.2
## [9] knitr_1.31 assertthat_0.2.1 httr_1.4.2 forcats_0.5.1
## [13] stringr_1.4.0 dplyr_1.0.3 purrr_0.3.4 readr_1.4.0
## [17] tidyr_1.1.2 tibble_3.0.5 ggplot2_3.3.3 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] ggtext_0.1.1 fs_1.5.0 RColorBrewer_1.1-2 ggsci_2.9
## [5] tools_4.0.2 backports_1.2.1 utf8_1.2.1 R6_2.5.0
## [9] DBI_1.1.1 colorspace_2.0-0 withr_2.4.2 tidyselect_1.1.1
## [13] gridExtra_2.3 curl_4.3 compiler_4.0.2 cli_2.5.0
## [17] rvest_0.3.6 xml2_1.3.2 labeling_0.4.2 scales_1.1.1
## [21] survMisc_0.5.5 digest_0.6.27 foreign_0.8-81 rmarkdown_2.6
## [25] rio_0.5.26 pkgconfig_2.0.3 htmltools_0.5.1.1 highr_0.9
## [29] dbplyr_2.0.0 rlang_0.4.10 readxl_1.3.1 rstudioapi_0.13
## [33] generics_0.1.0 farver_2.1.0 zoo_1.8-8 zip_2.1.1
## [37] car_3.0-10 magrittr_2.0.1 Matrix_1.2-18 Rcpp_1.0.6
## [41] munsell_0.5.0 fansi_0.4.2 abind_1.4-5 lifecycle_1.0.0
## [45] stringi_1.5.3 yaml_2.2.1 snakecase_0.11.0 carData_3.0-4
## [49] plyr_1.8.6 grid_4.0.2 crayon_1.4.1 lattice_0.20-41
## [53] haven_2.3.1 splines_4.0.2 gridtext_0.1.4 hms_1.0.0
## [57] pillar_1.6.0 markdown_1.1 ggsignif_0.6.1 reprex_1.0.0
## [61] glue_1.4.2 evaluate_0.14 data.table_1.13.6 modelr_0.1.8
## [65] vctrs_0.3.6 cellranger_1.1.0 gtable_0.3.0 reshape_0.8.8
## [69] km.ci_0.5-2 xfun_0.20 openxlsx_4.2.3 janitor_2.1.0
## [73] xtable_1.8-4 broom_0.7.6 rstatix_0.7.0 KMsurv_0.1-5
## [77] ellipsis_0.3.1